# =============================================================================
# April 24 2025
# R code used to produce all results, tables, and figures in the SI Online Appendix G.3
# Rebecca Cordell
# Unpacking the Role of In-Group Bias in US Public Opinion on Human Rights Violations
# American Journal of Political Science
# https://doi.org/10.7910/DVN/TGAL7M
# =============================================================================

# Clear work environment
rm(list=ls())

# Install Packages
#install.packages("dplyr")
#install.packages("ggplot2")
#install.packages("forcats")
#install.packages("ggpubr")
#install.packages("ggeasy")
#install.packages("lemon")
#install.packages("sandwich")
#install.packages("survey")
#install.packages("lmtest")
#install.packages("remotes")
#remotes::install_version("cregg", version = "0.4.0")
#install.packages("nnet")
#install.packages("stargazer")
#install.packages("lmtest")

# Required Packages
library("dplyr")
library("ggplot2")
library("forcats")
library("ggpubr")
library("sandwich")
library("survey")
library("lmtest")
library("cregg")
library("ggeasy")
library("lemon")
library("nnet")
library("stargazer")
library("lmtest")

options(scipen = 999)
options(warn=-1)

# -----------------------------------------------------------------------------
# Read in data
# -----------------------------------------------------------------------------

hr_survey<-read.csv("cordell_ingroupbiashumanrights_data.csv", header=TRUE, stringsAsFactors = FALSE)

# =============================================================================
# Table G.3.1: Perpetrator (Partisanship) Randomization Balance Check, Multinomial Logit Regression
# =============================================================================

# Set level order for group identity attributes and some control variables
hr_survey$perp <- factor(hr_survey$perp, levels = c("A Democrat governor","The Democrat president", "The Republican president","A Republican governor"))
hr_survey$targ_race <- factor(hr_survey$targ_race, levels = c("asian", "black", "hispanic", "middle eastern", "white"))
hr_survey$targ_relig <- factor(hr_survey$targ_relig, levels = c("buddhist", "christian", "hindu", "jewish", "muslim")) 
hr_survey$targ_citiz <- factor(hr_survey$targ_citiz, levels = c("American citizens", "naturalized American citizens", "immigrants with legal status","immigrants without legal status"))
hr_survey$elite<- fct_recode(hr_survey$elite,
                             "Amnesty International" = "Amnesty International, an international nonprofit organization,")
hr_survey$elite <- factor(hr_survey$elite, levels = c("The American Civil Liberties Union, an American nonprofit organization,", "Amnesty International","A Democrat member of Congress", "A Republican member of Congress"))
hr_survey$resp_gender <- factor(hr_survey$resp_gender, levels = c("Male", "Female"))
hr_survey$resp_educ_highlow <- ifelse(hr_survey$resp_educ %in% c("College", "Post-grad"), "Higher", "Lower")
hr_survey$resp_educ_highlow <- factor(hr_survey$resp_educ_highlow, levels = c("Lower", "Higher"))

# -----------------------------------------------------------------------------
# Conduct randomization balance checks for perpetrator group identity attribute
# -----------------------------------------------------------------------------

perp.random <- multinom(as.factor(perp) ~ as.numeric(resp_age) + as.factor(resp_gender) + as.factor(resp_educ_highlow) + as.factor(resp_dem), data=hr_survey)

# -----------------------------------------------------------------------------
# Create table
# -----------------------------------------------------------------------------

# Print Table G.3.1
stargazer(perp.random, covariate.labels=c("Age","Female","High Education","Democrat"), type="html", omit.stat = c("all"), model.numbers=FALSE, out="table_g3_1.html")

# =============================================================================
# Table G.3.2: Target (Race) Attribute Randomization Balance Check, Multinomial Logit Regression
# =============================================================================

# -----------------------------------------------------------------------------
# Conduct randomization balance check for target race group identity attribute
# -----------------------------------------------------------------------------

targ_race.random <- multinom(as.factor(targ_race) ~ as.numeric(resp_age) + as.factor(resp_gender) + as.factor(resp_educ_highlow) + as.factor(resp_dem), data=hr_survey)

# -----------------------------------------------------------------------------
# Create table
# -----------------------------------------------------------------------------

# Print Table G.3.2
stargazer(targ_race.random, covariate.labels=c("Age","Female","High Education","Democrat"), type="html", omit.stat = c("all"), out="table_g3_2.html")

# =============================================================================
# Table G.3.3: Target (Religion) Attribute Randomization Balance Check, Multinomial Logit Regression
# =============================================================================

# -----------------------------------------------------------------------------
# Conduct randomization balance check for target religion group identity attribute
# -----------------------------------------------------------------------------

targ_relig.random <- multinom(as.factor(targ_relig) ~ as.numeric(resp_age) + as.factor(resp_gender) + as.factor(resp_educ_highlow) + as.factor(resp_dem), data=hr_survey)

# -----------------------------------------------------------------------------
# Create table
# -----------------------------------------------------------------------------

# Print Table G.3.3
stargazer(targ_relig.random, covariate.labels=c("Age","Female","High Education","Democrat"), type="html", omit.stat = c("all"), out="table_g3_3.html")

# =============================================================================
# Table G.3.4: Target (Citizenship) Attribute Randomization Balance Check, Multinomial Logit Regression
# =============================================================================

# -----------------------------------------------------------------------------
# Conduct randomization balance check for target citizenship group identity attribute
# -----------------------------------------------------------------------------

targ_citiz.random <- multinom(as.factor(targ_citiz) ~ as.numeric(resp_age) + as.factor(resp_gender) + as.factor(resp_educ_highlow) + as.factor(resp_dem), data=hr_survey)

# -----------------------------------------------------------------------------
# Create table
# -----------------------------------------------------------------------------

# Print Table G.3.4
stargazer(targ_citiz.random, covariate.labels=c("Age","Female","High Education","Democrat"), type="html", omit.stat = c("all"), out="table_g3_4.html")

# =============================================================================
# Table G.3.5: Elite Cue (Partisanship) Attribute Randomization Balance Check, Multinomial Logit Regression
# =============================================================================

# -----------------------------------------------------------------------------
# Conduct randomization balance check for target elite cue group identity attribute
# -----------------------------------------------------------------------------

elite.random <- multinom(as.factor(elite) ~ as.numeric(resp_age) + as.factor(resp_gender) + as.factor(resp_educ_highlow) + as.factor(resp_dem), data=hr_survey)

# Print Table G.3.5
stargazer(elite.random, covariate.labels=c("Age","Female","High Education","Democrat"), type="html", omit.stat = c("all"), out="table_g3_5.html")

# =============================================================================
# Figure G.3: Effect of Group Identity Attributes, Randomization Balance Check Controlling for Key Respondent Demographics (Age, Gender, Education, and Party Identification)
# =============================================================================

# Create age dummy variable
hr_survey$resp_age_oldyoung <- ifelse(hr_survey$resp_age >= 49, "Older", "Younger")

# Convert regression variables into factors
factor_vars <- c("perp_match", "agent", "type", "scope", "targ_nonstate",
                 "targ_race_match", "targ_relig_match", "targ_citiz_match",
                 "frame", "elite_match", "resp_age_oldyoung", "resp_gender", "resp_educ_highlow",
                 "resp_dem")
hr_survey[factor_vars] <- lapply(hr_survey[factor_vars], as.factor)

# -----------------------------------------------------------------------------
# Calculate marginal means
# -----------------------------------------------------------------------------

# Model 1
fig_g3_m1 <- cregg::cj(hr_survey, outcome1 ~ perp_match + agent + type + scope + targ_nonstate + targ_race_match + targ_relig_match + targ_citiz_match + frame + elite_match + resp_age_oldyoung + resp_gender + resp_educ_highlow + resp_dem, id = ~id, estimate = "mm")

# Subset group identity dummy variables
group_vars <- c("perp_match", "targ_race_match", 
                "targ_relig_match", "targ_citiz_match", 
                "elite_match")
fig_g3_m1 <- fig_g3_m1[fig_g3_m1$feature %in% group_vars,]

# Model 2
fig_g3_m2 <- cregg::cj(hr_survey, outcome2 ~ perp_match + agent + type + scope + targ_nonstate + targ_race_match + targ_relig_match + targ_citiz_match + frame + elite_match + resp_age_oldyoung + resp_gender + resp_educ_highlow + resp_dem, id = ~id, estimate = "mm")

# Subset group identity dummy variables
fig_g3_m2 <- fig_g3_m2[fig_g3_m2$feature %in% group_vars,]

# -----------------------------------------------------------------------------
# Prepare figures
# -----------------------------------------------------------------------------

# Combine models
fig_g3_all <- rbind(fig_g3_m1, fig_g3_m2)

# Create group identity variable labels
variable_labels <- c(
  "perp_match" = "Perpetrator (partisanship)",
  "targ_race_match" = "Target (race)",
  "targ_relig_match" = "Target (religion)",
  "targ_citiz_match" = "Target (citizenship)",
  "elite_match" = "Elite cue (partisanship)"
)
fig_g3_all <- dplyr::bind_rows(
  fig_g3_all,
  do.call(rbind, lapply(names(variable_labels), function(variable) {
    data.frame(
      outcome = c("outcome1", "outcome2"),
      variable = variable_labels[variable],
      level = variable_labels[variable],
      estimate = NA, lower = NA, upper = NA
    )
  }))
)

# Create respondents variable (in-groups vs. out-groups)
fig_g3_all <- fig_g3_all %>%
  mutate(
    Respondents = case_when(
      grepl("_in", level) ~ "In-group",
      grepl("_out", level) ~ "Out-group",
      level %in% c("Perpetrator (partisanship)", "Target (race)", 
                   "Target (religion)", "Target (citizenship)", "Elite cue (partisanship)") ~ "In-group",
      TRUE ~ level
    ),
    Respondents = factor(Respondents, levels = c("In-group", "Out-group"))
  )

# Set factor order
level_order <- c(
  "Perpetrator (partisanship)", "perp_in", "perp_out",
  "Target (race)", "race_in", "race_out",
  "Target (religion)", "relig_in", "relig_out",
  "Target (citizenship)", "citiz_in", "citiz_out",
  "Elite cue (partisanship)", "elite_in", "elite_out"
)
fig_g3_all <- fig_g3_all %>%
  mutate(level = factor(level, levels = level_order) %>% fct_rev())

# Separate models
fig_g3_m1 <- filter(fig_g3_all, outcome == "outcome1")
fig_g3_m2 <- filter(fig_g3_all, outcome == "outcome2")

# -----------------------------------------------------------------------------
# Create figures
# -----------------------------------------------------------------------------

# Create panel (a) Disapproval forced-choice
fig_g3_m1_plot <- ggplot(fig_g3_m1, aes(x = level, y = estimate, colour = Respondents)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), size = 0.3, width = 0.2) +
  scale_x_discrete(labels = c(
    "", "Elite cue (partisanship)", "", 
    "", "Target (citizenship)", "", 
    "", "Target (religion)", "", 
    "", "Target (race)", "", 
    "","Perpetrator (partisanship)",""
  )) +
  xlab('') + ylab('Marginal mean') +
  coord_flip() +
  geom_hline(yintercept = 0.5, size = 0.2, color = "black") +
  scale_y_symmetric(mid = 0.5) +
  theme_classic(base_size = 10) +
  scale_colour_grey() +
  easy_center_title() +
  ggtitle("(a) Disapproval forced-choice") +
  theme(
    axis.text.x = element_text(colour = "black"),
    axis.text.y = element_text(colour = "black"),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.ticks.y = element_blank()
  ) +
  labs(color = NULL)

# Create panel (b) Disapproval ratings-based
fig_g3_m2_plot <- ggplot(fig_g3_m2, aes(x = level, y = estimate, colour = Respondents)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), size = 0.3, width = 0.2) +
  scale_x_discrete(labels = c(
    "", "Elite cue (partisanship)", "", 
    "", "Target (citizenship)", "", 
    "", "Target (religion)", "", 
    "", "Target (race)", "", 
    "","Perpetrator (partisanship)",""
  )) +
  xlab('') + ylab('Marginal mean') +
  coord_flip() +
  theme_classic(base_size = 10) +
  scale_colour_grey() +
  easy_center_title() +
  ggtitle("(b) Disapproval ratings-based") +
  theme(
    axis.text.x = element_text(colour = "black"),
    axis.text.y = element_text(colour = "black"),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.ticks.y = element_blank()
  ) +
  labs(color = NULL)

# Print Figure G.3
pdf('figure_g3.pdf', width = 8.26, height = 5.82)
ggarrange(fig_g3_m1_plot, NULL, fig_g3_m2_plot,
          nrow = 1, common.legend = T, legend = "bottom", ncol=3, widths = c(1, 0.09, 1))
dev.off()

# =============================================================================
# In-text Results
# =============================================================================

# "I find no  significant associations, indicating that the attribute levels are jointly balanced in these tests: the omnibus test statistic for Age is 0.62, for Female is 24.49, for High Education is 23.97, and for Democrat is 24.83."
# the omnibus test statistic for Age is 0.62
age.random <- lm(as.numeric(resp_age) ~ as.factor(perp) + as.factor(agent) + as.factor(type) + as.factor(scope) + as.factor(targ_nonstate) + as.factor(targ_race) + as.factor(targ_relig) + as.factor(targ_citiz) + as.factor(frame) + as.factor(elite), data=hr_survey)
age.random<-summary(age.random)
age.random<-age.random$fstatistic
age.random.omnibus<-round(age.random[1], digits=2)
age.random.omnibus
# for Female is 24.49
resp_gender.random <- glm(as.factor(resp_gender) ~ as.factor(perp) + as.factor(agent) + as.factor(type) + as.factor(scope) + as.factor(targ_nonstate) + as.factor(targ_race) + as.factor(targ_relig) + as.factor(targ_citiz) + as.factor(frame) + as.factor(elite), data=hr_survey, family=binomial)
resp_gender.random.omnibus<-round(lrtest(resp_gender.random), digits=2)
resp_gender.random.omnibus$Chisq[2]
# for High Education is 23.97
educ.random <- glm(as.factor(resp_educ_highlow) ~ as.factor(perp) + as.factor(agent) + as.factor(type) + as.factor(scope) + as.factor(targ_nonstate) + as.factor(targ_race) + as.factor(targ_relig) + as.factor(targ_citiz) + as.factor(frame) + as.factor(elite), data=hr_survey, family=binomial)
educ.random.omnibus<-round(lrtest(educ.random), digits=2)
educ.random.omnibus$Chisq[2]
# for Democrat is 24.83
political.random <- glm(as.factor(resp_dem) ~ as.factor(perp) + as.factor(agent) + as.factor(type) + as.factor(scope) + as.factor(targ_nonstate) + as.factor(targ_race) + as.factor(targ_relig) + as.factor(targ_citiz) + as.factor(frame) + as.factor(elite), data=hr_survey, family=binomial)
political.random.omnibus<-round(lrtest(political.random), digits=2)
political.random.omnibus$Chisq[2]